********************************************************************************************************************************
***   Replication file for:                                                                                                  ***
***   Berbee, P., Braun, S. T. and Franke, R. (2024). Reversing Fortunes of German Regions, 1926-2019. JoEG.			     ***
***   							                                                                                             ***
***   SCRIPT: 	_x99_merge_create_label.do																					 ***	
***   PURPOSE: 	Reads in and merge data; create, label, and orders variables											 	 ***
********************************************************************************************************************************

* Preamble (unnecessary when executing run.do)
run "$reversing/scripts/programs/_config.do"

************
* Code begins
************

*** Part A: Merge data set

use labor_market_id year population GDP gebietsstand empl* worker* using "$reversing/processed/intermediate/GDP.dta", clear 

append using "$reversing/processed/intermediate/turnover1926.dta" 
append using "$reversing/processed/intermediate/turnover1935.dta" 
append using "$reversing/processed/intermediate/turnover1950.dta" 
append using "$reversing/processed/intermediate/turnover1955.dta" 

merge m:m year using "$reversing/processed/intermediate/GDPdeflator.dta", nogen keep(match master)

merge m:1 labor_market_id using "$reversing/processed/intermediate/spatialcontrols.dta" 
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/spatialcontrols.dta" 
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/iv_bairoch.dta" 
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/iv_daudin.dta" 
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/iv_daudin_lrivers.dta" 
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/iv_logdist.dta" 
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/occ_census1939.dta"
drop _merge
	
merge m:1 labor_market_id using "$reversing/processed/intermediate/occ_census1950.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/exp_census1950.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/wardamage_census1950.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/sbz_census1961.dta"
drop _merge
	
merge m:1 labor_market_id using "$reversing/processed/intermediate/occ_census1882_emp.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/occ_census1882_pop.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/occ_census1895.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/occ_census1907.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/VZ1950_87.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/census_2011.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/preindustrial.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/patents_5y.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/malost1945.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/rubble.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/schengen.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/election1957.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/data/majors_1950-1990.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/students2019.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/universities.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/pupil_teacher.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/netmig_1950_61.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/netmig_1961_70.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/refugees2016.dta"
drop _merge

merge m:1 labor_market_id using "$reversing/processed/intermediate/labor_market_ids.dta"
drop _merge

********************************************************************************

*** Part B: Create and label variables

********************************************************************************


*** Drop Saarland (for which we do not have information early on and after WW2)

drop if labor_market_id == 103

*** GDP and turnover statistics ************************************************

** Population

recode population 0 = .
recode pop_1950_hv 0 = .

replace population=pop_1950_hv if year==1950

* Real and per capita values 
foreach var in GDP { /*GVA_prod_total GVA_prod_agric GVA_prod_industry GVA_serv_total GVA_serv_trade GVA_serv_other*/
gen real`var'=100*`var'/DeflatorDM if inlist(gebietsstand,1,2,3,4,5)
replace real`var'=100*`var'/DeflatorEUR if inlist(gebietsstand,6,7)
gen pc`var'=1000000*`var'/population
gen realpc`var'=1000000*real`var'/population
}

* GDP ranking
bysort year gebietsstand: egen pcGDP_ranking=rank(pcGDP)

* Create turnover per capita and ranking
gen realturnover = (100*turnover/DeflatorDM) 
label var realturnover "Real turnover (in 1992 prices)"

gen turnoverpc = (1000 * turnover / population ) if year != 1955
replace turnoverpc = (1000000 * turnover / population ) if year == 1955
label var turnoverpc "Turnover per capita"

gen realturnoverpc = (1000 * realturnover / population ) if year != 1955
replace realturnoverpc = (1000000 * realturnover / population ) if year == 1955
label var realturnoverpc "Real turnover per capita (in 1992 prices)"

sort year gebietsstand labor_market_id
bysort year gebietsstand: egen turnoverpc_ranking=rank(turnoverpc)
label var turnoverpc_ranking "Turnover per capita ranking"

drop pcGDP DeflatorDM DeflatorEUR 

gen realpwGDP = 1000000* realGDP / worker_t1961 if year==1961


* Rank percentile (Since 1957 based on GDP ranking, for earlier years based on turnover ranking)
gen rank=pcGDP_ranking
replace rank=turnoverpc_ranking if pcGDP_ranking==.
xtile rank_perc = rank, nq(100) 
label var rank_perc "Percentile rank in income per capita distribution"
drop rank

* Change in percentile rank 
gen help = rank_perc if year == 1926
bysort labor_market_id: egen rank_perc_1926 = mean(help)
drop help

gen help = rank_perc if year == 1957
bysort labor_market_id: egen rank_perc_1957 = mean(help)
drop help

gen help = rank_perc if year == 2019
bysort labor_market_id: egen rank_perc_2019 = mean(help)
drop help

gen change_perc_2619 = rank_perc_2019 - rank_perc_1926
label var change_perc_2619 "Percentile rank change in income per capita distribution, 1926-2019" 

gen change_perc_5719 = rank_perc_2019 - rank_perc_1957
label var change_perc_5719 "Percentile rank change in income per capita distribution, 1957-2019" 

drop rank_perc_1926 rank_perc_1957 rank_perc_2019

* Log real per capita GDP
gen log_realpcGDP = log(realpcGDP)
label var log_realpcGDP "Real per capita GDP (logarithm)"

gen log_realpcturnover = log(realturnoverpc)
label var log_realpcturnover "Real per capita turnover (logarithm)"


* Create employment share in industry, services and agriculture
gen empshare_prod_industry=empl_prod_industry/(empl_prod_total+empl_serv_total)
gen empshare_prod_agric=empl_prod_agric/(empl_prod_total+empl_serv_total)
gen empshare_serv_total=empl_serv_total/(empl_prod_total+empl_serv_total)
label var empshare_prod_industry "Employment share in industry"
label var empshare_prod_agric "Employment share in agriculture"
label var empshare_serv_total "Employment share in services"

drop empl_prod_agric empl_prod_industry empl_prod_total empl_serv_total empl_serv_trade empl_serv_other


********************************************************************************
*** 1882 Industry structure ****************************************************


* Create employment shares of 2-digit industries
foreach var in Landw Forstw Fische Bergbau Torf Steine Metall Eisen Maschinen Chemisch ForstwNeben Textil PapierLeder Holz Nahrung Bekleidung Baugewerbe Polygr Kunst Fabrikanten Handel Versicherung Landverkehr Wasserverkehr Beherbergung HauslDienstl Staats OhneBeruf {
gen empshare_`var'_1882=emp_`var'_1882/emp_Gesamt_1882
label var empshare_`var'_1882 "Empl. share in ${d`var'} (1882)"
}

* Create employment share in industry occupations (these are employment shares - excluding family members)
gen empshare_ind_1882 = (empshare_Bergbau_1882 + empshare_Torf_1882 + empshare_Steine_1882 + empshare_Metall_1882 + empshare_Eisen_1882 + empshare_Maschinen_1882 + empshare_Chemisch_1882 + empshare_ForstwNeben_1882 + empshare_Textil_1882 + empshare_PapierLeder_1882 + empshare_Holz_1882 + empshare_Nahrung_1882) 
egen empshare_ind_1882_std=std(empshare_ind_1882), by(year gebietsstand)

label var empshare_ind_1882 "Employment share of mining and industry (excl. Gewerbe), 1882"
label var empshare_ind_1882_std "Employment share of mining and industry (excl. Gewerbe), 1882 (standardized)"

* Create employment share in agrarian occupations (including Forstwirtschaft and fishery; these are employment shares - excluding family members)
gen empshare_agr_1882 = (empshare_Landw_1882 + empshare_Forstw_1882 + empshare_Fische_1882) 
egen empshare_agr_1882_std=std(empshare_agr_1882), by(year gebietsstand)

label var empshare_agr_1882 "Employment share of agriculture and forestry, animal husbandry, hunting, fishing, 1882"
label var empshare_agr_1882_std "Employment share of agriculture and forestry, animal husbandry, hunting, fishing, 1882 (standardized)"

* Create employment share in tertiary occupations (these are employment shares - excluding family members)
gen empshare_tert_1882 = 1 -  empshare_ind_1882 - empshare_agr_1882
egen empshare_tert_1882_std=std(empshare_tert_1882), by(year gebietsstand)

label var empshare_tert_1882 "Employment share of tertiary occupations, 1882"
label var empshare_tert_1882_std "Employment share of tertiary occupations, 1882 (standardized)"



********************************************************************************
*** 1970/87 educational structure **********************************************

*** Create variables for low-/medium-/high-education

* High: Tertiary (university) education: Degree from a university of applied sciences or university.
* Medium: Middle or higher secondary education: Mittlere Reife, Abitur or Berufsfachschule (vocational school)
* Low: Up to lower secondary education (only compulsory schooling): Maximum elementary/high school diploma and no other degree.

* Two potential normalizations: population over 18 or just the sum over all categories
* Both will yield error (e.g. what about pupil still studying or those w/o category)
* Normalizing by population will force us to calculate low-skilled share as residual => apparently different normalizations make little difference

gen share_high_ed_1970 = 100 * (hochschule_1970) / (population_ü15_1970)
label var share_high_ed_1970 "High-skilled population share, 1970"
gen share_med_ed_1970 = 100 * (mhsreife_1970 + berufsfach_1970) / (population_ü15_1970)
label var share_med_ed_1970 "Medium-skilled population share, 1970"
gen share_low_ed_1970 = 100 - share_high_ed_1970 - share_med_ed_1970
label var share_low_ed_1970 "Low-skilled population share, 1970"
gen share_med_high_ed_1970 = share_high_ed_1970 + share_med_ed_1970
label var share_med_high_ed_1970 "Medium- and high-skilled population share, 1970"
gen popshare_noschooldegree_1970 = 100 * (population_ü15_1970 - volksschule_1970 - mhsreife_1970 - berufsfach_1970 - hochschule_1970) / population_ü15_1970
label var popshare_noschooldegree_1970  "Population share without school degree, 1970"

* Create variables on unemployment and foreign unemployment in 1987
gen share_unemployed_1987=erwerbslose_1987/(erwerbslose_1987+erwerbst_1987)
label var share_unemployed_1987 "Unemployment share of population (1987)"

* Create variables on foreigner shares
foreach year in 1961 1970 1987{
recode auslaender_`year' 0 = .
gen popshare_foreign_`year'=auslaender_`year'/bevoelkerung_`year'
label var popshare_foreign_`year' "Population share foreigners (`year')"
}

* Create variables on self-employed, lehrlinge and employed in industry:
gen empshare_self_1950 = selfemployed_1950/erwerbst_1950
label var empshare_self_1950 "Employment share of self-employed, 1950"


foreach year in 1950 1961 1970 1987{
gen empshare_indust_`year'=	erwerbst_prod_`year'/erwerbst_`year'
label var empshare_indust_`year' "Employment share industry (`year')"
gen empshare_agr_`year'=	erwerbst_landforst_`year'/erwerbst_`year'
label var empshare_agr_`year' "Employment share agriculture (`year')"
gen empshare_serv_`year'=	erwerbst_sonst_`year'/erwerbst_`year'
label var empshare_serv_`year' "Employment share services (`year')"
}

gen empshare_appr_ind_1970 = (lehrlinge_gewerblich_1970 / erwerbst_prod_1970)
label var empshare_appr_ind_1970 "Share of industrial apprentices over all employees in industry, 1970"
gen indappr_per_establishment_1970 = lehrlinge_gewerblich_1970 / arbeitsstätten_prod_1970
label var indappr_per_establishment_1970 "Average number of industrial apprentices per establishment in industry, 1970"
drop lehrlinge_gewerblich_1970 erwerbst_prod_1970 arbeitsstätten_prod_1970


* create GDR refugee share

gen popshare_gdr_1961 = sbz_1961 * 1000 / bevoelkerung_1961
label var popshare_gdr_1961 "Population share of GDR refugees, 1961"
drop sbz_1961

* create variables from 2011 census
gen popshare_academics_2011=(persons_voc_fachochschule+ persons_voc_university+persons_voc_phd)/persons_voc_total
gen popshare_dualeausb_2011=persons_voc_dualeausbildung/persons_voc_total
gen popshare_voctraining_2011=(persons_voc_fachschule+persons_voc_fachakdemie+persons_voc_dualeausbildung)/persons_voc_total
gen popshare_novocdegree_2011=persons_voc_nodegree/persons_voc_total

label var popshare_academics_2011 "Population share with university or Fachochschuldegree (2011)"
label var popshare_dualeausb_2011 "Population share with Duale Ausbildung (2011)"
label var popshare_voctraining_2011 "Population share with any vocational training (2011)"
label var popshare_novocdegree_2011 "Population share with no professional degree (2011)"

gen share_high_ed_2011 = popshare_academics_2011 * 100
gen share_med_ed_2011 = popshare_voctraining_2011 * 100
gen share_low_ed_2011 = popshare_novocdegree_2011 * 100
gen share_med_high_ed_2011 = share_med_ed_2011 + share_high_ed_2011

label var share_high_ed_2011 "High-skilled population share, 2011"
label var share_med_ed_2011 "Medium-skilled population share, 2011"
label var share_low_ed_2011 "Low-skilled population share, 2011"
label var share_med_high_ed_2011 "Medium- and high-skilled population share, 2011"


gen popshare_abitur_2011=(persons_educ_inoberstufe+ persons_educ_fachochschulr+ persons_educ_hochschulreife)/ persons_educ_total
gen popshare_mreife_2011=persons_educ_mittlerereife/ persons_educ_total
gen popshare_hschule_2011=persons_educ_hauptschule/persons_educ_total
gen popshare_noschooldegree_2011=100 * persons_educ_nodegree/persons_educ_total

label var popshare_abitur_2011 "Population share with Hochschulreife (2011)"
label var popshare_mreife_2011 "Population share with Mittlere Reife(2011)"
label var popshare_hschule_2011 "Population share with Hauptschulabschluss (2011)"
label var popshare_noschooldegree_2011 "Population share with no school degree (2011)"

gen _patents_1878_1882=pat_1878_1882/(pop_Gesamt_1882 / 1000)
gen _patents_1903_1907=pat_1903_1907/(total_pop_1907 / 1000)
gen _patents_1922_1926=pat_1922_1926/(population / 1000) if year==1926
gen _patents_1931_1935=pat_1931_1935/(population / 1000) if year==1935
gen _patents_1951_1955=pat_1951_1955/(population / 1000) if year==1955
gen _patents_1962_1966=pat_1962_1966/(population / 1000) if year==1966
gen _patents_1970_1974=pat_1970_1974/(population / 1000) if year==1974
gen _patents_1983_1987=pat_1983_1987/(bevoelkerung_1987 / 1000)
gen _patents_1992_1996=pat_1992_1996/(population / 1000) if year==1996
gen _patents_2002_2006=pat_2002_2006/(population / 1000) if year==2006 & gebietsstand==7
gen _patents_2012_2016=pat_2012_2016/(population / 1000) if year==2016

sort labor_market_id year gebietsstand 
foreach var of varlist _patents_????????? {
by labor_market_id: egen pc`var'=max(`var')
drop `var'
}
 
**** Refugee share 2016

gen help = asylum_seekers_2016 / population if year == 2015
bysort labor_market_id: egen refugee_share_2016 = mean(help)
drop help
label var refugee_share_2016 "Refugee share in 2016"




********************************************************************************
*** Create indicators for preindustrial development ****************************

*Share of cities with town rights
gen town_1800_perarea = town_1800 / area
gen town_1700_perarea = town_1700 / area


********************************************************************************
*** Different definitions of what we mean by "northern Germany" ****************

* north1: South includes BY, BW, RLP, HE
gen north1=1
replace north1=0 if inlist(state_id,6,7,8,9)
*gen south1=(north1==0)
label var north1 "North, narrow definition (0/1)"

* north2: South includes BY, BW, Rhein-Main & Pfalz (Geographical/cultural borders)
gen north2=1
replace north2=0 if inlist(state_id,8,9) 
replace north2=0 if inlist(labor_market_id,86,87,97,98,99,100,104,105,120,121,122)
label var north2 "North, broad definition (0/1)"

* north3: split at mean latitude
egen latitude_norm=std(latitude)
gen north3 = 0
replace north3 = 1 if latitude_norm > 0
label var north3 "North, split at mean latitude (0/1)"

* Dummies for Ruhrgebiet 
gen ruhrgebiet = 0
replace ruhrgebiet = 1 if inlist(labor_market_id,44,45,46,47,48,49)  /* Duisburg, Essen, Recklinghausen, Bochum, Dortmund, Hamm-Beckum */
label var ruhrgebiet "Ruhrgebiet (0/1)"

********************************************************************************
*** Different versions of the Instruments (log coal access) ********************

gen log_coal_access1_bai = log(coal_access1_bairoch) 
gen log_coal_access2_bai = log(coal_access2_bairoch)
gen log_carb_access1_bai = log(carb_access1_bairoch)
gen log_land_access1_bai = log(land_access1_bairoch)
gen log_land_access2_bai = log(land_access2_bairoch)
gen log_coal_access1_bai_PWA1 = log(coal_access1_bairoch_PWA1)
gen log_coal_access1_bai_PWA2 = log(coal_access1_bairoch_PWA2)

label var log_coal_access1_bai "Coal area (sqm) weighted by Bairoch transport costs (logs)"
label var log_coal_access2_bai "Coal area (sqm) weighted by Bairoch transport costs squared (logs)"
label var log_carb_access1_bai "Carboniferous strata area (sqm) weighted by Bairoch transport costs (logs)"
label var log_land_access1_bai "Land access weighted by Bairoch transport costs (logs)"
label var log_land_access2_bai "Land access weighted by Bairoch transport costs squared"
label var log_coal_access1_bai_PWA1 "Coal area (sqm) weighted by Bairoch transport costs, based on PWA 1967 (logs)"
label var log_coal_access1_bai_PWA2 "Coal area (sqm) weighted by Bairoch transport costs, based on PWA 1967 and FR2020 (logs)"

gen log_coal_access1_dau = log(coal_access1_daudin) 
gen log_coal_access2_dau = log(coal_access2_daudin)
gen log_carb_access1_dau = log(carb_access1_daudin)
gen log_land_access1_dau = log(land_access1_daudin)
gen log_land_access2_dau = log(land_access2_daudin)
gen log_coal_access1_dau_PWA1 = log(coal_access1_daudin_PWA1)
gen log_coal_access1_dau_PWA2 = log(coal_access1_daudin_PWA2) 

label var log_coal_access1_dau "Coal area (sqm) weighted by Daudin transport costs (logs)"
label var log_coal_access2_dau "Coal area (sqm) weighted by Daudin transport costs squared (logs)"
label var log_carb_access1_dau "Carboniferous strata area (sqm) weighted by Daudin transport costs (logs)"
label var log_land_access1_dau "Land access weighted by Daudin transport costs (logs)"
label var log_land_access2_dau "Land access weighted by Daudin transport costs squared"
label var log_coal_access1_dau_PWA1 "Coal area (sqm) weighted by Daudin transport costs, based on PWA 1967 (logs)"
label var log_coal_access1_dau_PWA2 "Coal area (sqm) weighted by Daudin transport costs, based on PWA 1967 and FR2020 (logs)"

gen log_coal_access1_lrivers_dau = log(coal_access1_lrivers_daudin) 
gen log_land_access1_lrivers_dau = log(land_access1_lrivers_daudin)
 
label var log_coal_access1_lrivers_dau "Coal area (sqm) weighted by Daudin transport costs, larger rivers only (logs)"
label var log_land_access1_lrivers_dau "Land access weighted by Daudin transport costs, larger rivers only (logs)"

drop coal_access1_bairoch coal_access2_bairoch carb_access1_bairoch coal_access1_bairoch_PWA1 coal_access1_bairoch_PWA2 land_access1_bairoch land_access2_bairoch carb_access1_daudin coal_access1_daudin_PWA1 coal_access1_daudin_PWA2 coal_access1_lrivers_daudin land_access1_lrivers_daudin


********************************************************************************
*** Label political outcomes					    ****************************

label var ob_years_CDU "Years major (Oberbürgermeister) was member of CDU" 
label var ob_years_CSU "Years major (Oberbürgermeister) was member of CSU"
label var ob_years_SPD "Years major (Oberbürgermeister) was member of SPD"
label var ob_years_others "Years major (Oberbürgermeister) was member of other party"

rename duration_longestparty ob_years_dominant_party
label var ob_years_dominant_party "Years major (Oberbürgermeister) was member of the dominant party"


********************************************************************************
*** University and students 					    ****************************

* Number of universities

foreach t in 1900 1910 1920 1930 1940 1950 1960 1970 1980 1990 2000 2010 2015{
	recode unis_`t' . = 0 
	recode unis_wphd_`t' . = 0
	recode lunis_`t' . = 0
	
	gen d_uni_`t' = 0 if unis_`t' == 0
	replace d_uni_`t' = 1 if unis_`t' >= 1
	label var d_uni_`t' "At least one university in `t' (0/1)?"
	
	gen d_luni_`t' = 0 if unis_`t' == 0
	replace d_luni_`t' = 1 if unis_`t' >= 1
	label var d_luni_`t' "At least one larger university in `t' (0/1)?"
	
	gen d_unis_wphd_`t' = 0 if unis_wphd_`t' == 0
	replace d_unis_wphd_`t' = 1 if unis_wphd_`t' >= 1
	label var d_unis_wphd_`t' "At least one university with Promotionsrecht in `t' (0/1)?"

	
	}


* Student composition by field

foreach x in humanities law_econ math_natsc medicine engineer other{
	recode students_`x'_2019 . = 0
	gen share_stud_`x'_2019 = 100 * (students_`x'_2019 / students_total_2019)
}

foreach x in humanities law_econ math_natsc medicine engineer other total{
	gen stud_`x'_pop_2019 = 100 * (students_`x'_2019 / population) if year == 2019
}



label var share_stud_humanities_2019 "Share of students in the humanities, 2019"
label var share_stud_law_econ_2019 "Share of students in law, economics, social sciences, 2019"
label var share_stud_math_natsc_2019 "Share of students in maths and natural sciences, 2019"
label var share_stud_medicine_2019 "Share of students in medicine, 2019"
label var share_stud_engineer_2019 "Share of students in engineering, 2019"
label var share_stud_other_2019 "Share of students in other fields, 2019"

label var stud_humanities_pop_2019 "Students in the humanities over population, 2019"
label var stud_law_econ_pop_2019 "Students in law, economics, social sciences over population, 2019"
label var stud_math_natsc_pop_2019 "Students in maths and natural sciences over population, 2019"
label var stud_medicine_pop_2019 "Students in medicine over population, 2019"
label var stud_engineer_pop_2019 "Students in engineering over population, 2019"
label var stud_other_pop_2019 "Students in other fields over population, 2019"
label var stud_total_pop_2019 "Students over population, 2019"
 
drop students_*

********************************************************************************
*** Net internal migration

gen help = net_mig_1950_61 / population if year == 1950
bysort labor_market_id: egen share_net_mig_1950_61 = mean(help)
drop help
label var share_net_mig_1950_61 "Net migration 1950-61 (share of 1950 population)"

gen help = net_mig_1961_70 / population if year == 1961
bysort labor_market_id: egen share_net_mig_1961_70 = mean(help)
drop help
label var share_net_mig_1961_70 "Net migration 1961-70 (share of 1961 population)"

gen help = (net_mig_1950_61 + net_mig_1961_70) / population if year == 1950
bysort labor_market_id: egen share_net_mig_1950_70 = mean(help)
drop help
label var share_net_mig_1950_70 "Net migration 1950-70 (share of 1950 population)"

drop net_mig_1950_61 net_mig_1961_70

********************************************************************************
*** Create additional control variables ****************************************

* Taking logs
gen log_dist_coast = log(dist_coast)
gen log_dist_river = log(dist_river)
*gen log_DistRiver_river2 = log(dist_river_river2)
gen log_distGGB = log(dist_GGborder)
gen log_elevation = log(elevation)
*gen log_pop_1882=log(pop_Gesamt_1882)
gen log_dist_Eborder = log(dist_Eborder)
gen log_dist_Wborder = log(dist_Wborder)
gen log_dist_schengen_1995 = log(dist_schengen_1995)
gen log_dist_schengen_2008 = log(dist_schengen_2008)

label var dist_coast "Distance to coast (km)"
label var log_dist_coast "Distance to coast (km), logs"
label var dist_river "Distance to nearest major river"
label var log_dist_river "Distance to nearest major river, logs"
label var log_elevation "Elevation, logs"
label var log_distGGB "Distance to inner-German border, logs"
label var log_dist_Eborder "Distance to Germany's eastern border, logs"
label var log_dist_Wborder "Distance to Germany's western border, logs"
label var log_dist_schengen_1995 "Distance to Schengen area border in 1995, logs"
label var log_dist_schengen_2008 "Distance to Schengen area border in 2008, logs"

* dummy for border or coast
gen bordercoast=inlist(labor_market_id,1,2,4,5,6,8,9,10,14,16,17,26,27,39,40,41,42,43,65,66,81,90,91,101,102,112,119,120,121,122,134,135,136,145,146,147,148,152,153,154,155,156,158,159,161,162,163,164)
label var bordercoast "Located at coast (0/1)"

* dummy for inner german border
gen innergermanborder=inlist(labor_market_id,6,11,12,20,21,22,31,54,55,56,75,76,77,88,89,90,94,95)
label var innergermanborder "Located at inner-German border"

* dummy for any border (including inner German border)
*gen borderdummy= bordercoast==1 | innergermanborder==1

* Rubble per capita
recode rubble_unloosened . = 0
gen help = rubble_unloosened / (population / 1000) if year == 1935
bysort labor_market_id: egen rubble_per_capita_1946 = mean(help)
label var rubble_per_capita_1946 "Untreated rubble at the end of the war over the pre-war population"
drop rubble_unloosened help




********************************************************************************
*** C. Label and order *********************************************************

sort labor_market_id year



gen ___IDs__________________________=.
order ___IDs__________________________ labor_market_id labor_market_name labor_market_center rb_id rb_name state_id state_name state_1834_id gebietsstand year, first
label var gebietsstand "# of Destatis publication GDP data is based on" 
label var labor_market_id "Labor market ID"
label var year "Year"

gen ___GDP_Turnover_________________ = .
order ___GDP_Turnover_________________ GDP population realGDP realpcGDP realpwGDP log_realpcGDP pcGDP_ranking empshare_prod_industry empshare_prod_agric empshare_serv_total turnover turnoverpc realturnover realturnoverpc log_realpcturnover turnoverpc_ranking rank_perc change_perc_2619 change_perc_5719, after(year)
label var GDP "Nominal GDP (currency as in destatis publications)"
label var turnover "Nominal turonver (currency as in Stat. Reichsamt publications)"
label var population "Population"
label var realGDP "Real GDP (in 1992 DM)"
label var realpcGDP "Real GDP per capita (in 1992 DM)"
label var realpwGDP "Real GDP per worker (in 1992 DM)"
label var pcGDP_ranking "GDP per capita rank"

gen ___Instruments_________________ =.
order ___Instruments_________________ log_coal_* coal_* log_carb_*, after(change_perc_5719)

* Baseline instrument: Daudin (and associated land access measure)
rename log_coal_access1_dau log_coal_access1  
rename log_land_access1_dau log_land_access1  

gen ___Spatial_____________ = .
order ___Spatial_____________ longitude latitude area dist_carbon dist_coast log_dist_coast dist_river log_dist_river bordercoast log_land* land_* soil_quality elevation log_elevation ruggedness  sun9120 hotdays9120 temp9120 remote_KS2014 zollverein_1834 log_dist_schengen_1995 dist_schengen_1995 d_schengen_1995 log_dist_schengen_2008 dist_schengen_2008 d_schengen_2008 north1 north2 north3 ruhrgebiet, after(log_carb_access1_dau)
drop hanse

gen ___WW2_Division_____________ = .
order ___WW2_Division_____________ log_dist_Eborder dist_Eborder log_dist_Wborder dist_Wborder dist_GGborder log_distGGB innergermanborder  market_access_lost_1945 redevelopment_area rubble_per_capita_1946, after(ruhrgebiet) 

gen ___1882_Census________________ = .
rename pop_Gesamt_1882 pop_total_1882
label var pop_total_1882 "Population in 1882"
order ___1882_Census________________ empshare_ind_1882 empshare_ind_1882_std empshare_agr_1882 empshare_agr_1882_std empshare_tert_1882 empshare_tert_1882_std pop_total_1882, after(rubble_per_capita_1946)
drop empshare_Landw_1882- empshare_OhneBeruf_1882 emp_*1882
drop pop_Landw_1882 - pop_Handel_1882

gen ___1895_1907_Census_____________ = .
order ___1895_1907_Census_____________ empshare_ind_1907 empshare_ind_1907_std empshare_ind_1895 empshare_ind_1895_std  firmsize_industry_1907 empshare_f201_ind_1907 popshare_f201_ind_1907 empshare_f501_ind_1907 popshare_f501_ind_1907 empshare_f1000_ind_1907 popshare_f1000_ind_1907 empshare_hhi_ind_1907 total_pop_1907, after(pop_total_1882)

gen ___1939_Census________________ = .
order ___1939_Census________________ empshare_agr_1939 empshare_ind_1939 empshare_serv_1939, after(total_pop_1907) 

gen ___1950_Census____________ =.
gen expellee_share_1950 = expellees_1950 / pop_1950_hv
label var expellee_share_1950 "Expellee share 17/09/1950"
drop expellees_1950 pop_1950_hv
order ___1950_Census____________ expellee_share_1950 share_damaged_flats_1950 empshare_ind_1950 empshare_montan_1950 empshare_modernindustries_1950 empshare_otherindustry_1950 hhi_industryemp_1950, after(empshare_serv_1939)

gen ___1950_1987_Censuses_______ =.
order ___1950_1987_Censuses_______  popshare_foreign_1961 popshare_foreign_1970 popshare_foreign_1987 popshare_gdr_1961 share_net_mig_1950_61 share_net_mig_1961_70 share_net_mig_1950_70 share_high_ed_1970 share_med_ed_1970 share_low_ed_1970 share_med_high_ed_1970 popshare_noschooldegree_1970 empshare_self_1950 empshare_indust_1950 empshare_agr_1950 empshare_serv_1950 empshare_indust_1961 empshare_agr_1961 empshare_serv_1961 empshare_indust_1970 empshare_agr_1970 empshare_serv_1970 empshare_appr_ind_1970 indappr_per_establishment_1970 share_unemployed_1987 empshare_indust_1987 empshare_agr_1987 empshare_serv_1987,  after(hhi_industryemp_1950) 

drop volksschule_1970 - lehrlinge_1987
drop lehrlinge_gewerblich_1987

gen ___2011_Census________________ =.
order ___2011_Census________________ *2011, after(empshare_serv_1987)
drop persons_educ_total - population_arrival_before90

gen ___Political________________ =.
order ___Political________________ voteshare_SPD_1957 ob_years_CDU ob_years_CSU ob_years_SPD ob_years_others ob_years_dominant_party, after(popshare_noschooldegree_2011)

gen ___Preindustrial_development___ = .
order ___Preindustrial_development___ town_1800_perarea town_1700_perarea, after(ob_years_dominant_party) 
label var town_1800_perarea "No of towns per area in 1800"
label var town_1700_perarea "No of towns per area in 1700"

drop cities_total town_1800 town_1700 markets_1800 markets_1700 market_places_1700 market_places_1800 new_const_1700_1800 new_const_1700_1750  

gen ___Patents_____________________ =.
order ___Patents_____________________  pc_patents_1878_1882 pc_patents_1903_1907 pc_patents_1922_1926 pc_patents_1931_1935 pc_patents_1951_1955 pc_patents_1962_1966 pc_patents_1970_1974 pc_patents_1983_1987 pc_patents_1992_1996 pc_patents_2002_2006 pc_patents_2012_2016, after(town_1700_perarea)
drop pat_*

foreach period in 1878_1882 1903_1907 1922_1926 1931_1935 1951_1955 1962_1966 1970_1974 1983_1987 1992_1996 2002_2006 2012_2016 {
label var pc_patents_`period' "Patents per 1000 persons in `period' (patentcity)"
}

gen ___University_students________ =.
order ___University_students________ d_uni* unis_* d_luni* lunis_* share_stud_* stud_* pupil_teacherf_prim_2019 pupil_teacherf_sec_2019 pupil_teacher_prim_2019 pupil_teacher_sec_2019, after(pc_patents_2012_2016)


gen ___Other____________________ = .
order ___Other____________________ refugee_share_2016, after(pupil_teacher_sec_2019)
drop asylum*




********************************************************************************
*** Part D. Drop superfluous observations                                    ***

********************************************************************************
*** Keep always most recent GDP estimate for a year ****************************
*** Exception: 1961 so as to use consistent source for 1957-66
*** Exception: 1992 so as to avoid a break in datasource 1980-1992

keep if ///
inlist(year,1926,1935,1950,1955) | ///
gebietsstand==1 & inrange(year,1957,1966) | ///
gebietsstand==2 & inrange(year,1968,1968) | ///
gebietsstand==3 & inrange(year,1970,1974) | ///
gebietsstand==4 & year==1978 | ///
gebietsstand==5 & inrange(year,1980,1992) | ///
gebietsstand==6 & inrange(year,1993,1999) | ///
gebietsstand==7 & inrange(year,2000,2019)


* Order and save

sort labor_market_id year
save "$reversing/processed/workingdataset.dta", replace

*** EOF







